library(rmarkdown)
library(dplyr)
library(ggplot2)
library(broom)
library(janitor)
library(renv)
library(purrr)
library(tm)
library(SnowballC)
library(RColorBrewer)
library(ggplot2)
library(wordcloud)
library(biclust)
library(cluster)
library(igraph)
library(fpc)
library(magrittr)
library(rmarkdown)
library(textreuse)
library(slam)
library(plotly)
#packages <- c("tm", "SnowballCC", "RColorBrewer", "ggplot2", "wordcloud", "biclust", "cluster", "igraph", "fpc", "knitr", "dplyr", "broom", "janitor", "renv", "purrr")
#install.packages(packages, dependencies = TRUE)The dataset used for this project is president speeches obtained from this link.
Using the following script in Python, we first created a dataframe of the website’s speeches:
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Scrapes transcripts for inaugural addresses
def get_urls(url):
'''Returns list of transcript urls'''
page = requests.get(url).text
soup=BeautifulSoup(page, 'lxml')
url_table = soup.find("table", class_='table').find_all("a")
return [u["href"] for u in url_table]
urls = get_urls("https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/inaugural-addresses")
transcripts = pd.DataFrame()
def get_transcripts(urls, transcripts):
for u in urls:
page = requests.get(u).text
soup = BeautifulSoup(page, 'lxml')
t_president = soup.find("h3", class_="diet-title").text
t_year = soup.find("span", class_="date-display-single").text.split(',')[1].strip()
t_content = soup.find("div", class_="field-docs-content").text
record = {
'president' : t_president,
'year' : t_year,
'content' : t_content
}
transcripts = transcripts.append(record, ignore_index=True)
return transcripts
data = get_transcripts(urls,transcripts)
data.to_csv("us_presidents_transcripts.csv", sep="|")
In what follows, we load the dataframe:
df <- read.csv("https://raw.githubusercontent.com/berserkhmdvhb/MADS-NLP/main/data/presidents-speech.csv")df |> dplyr::glimpse()## Rows: 59
## Columns: 4
## $ X <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ president <chr> "George Washington", "George Washington", "John Adams", "Tho…
## $ year <int> 1789, 1793, 1797, 1801, 1805, 1809, 1813, 1817, 1821, 1825, …
## $ content <chr> "\nFellow-Citizens of the Senate and of the House of Represe…
df |> summary()## X president year content
## Min. : 0.0 Length:59 Min. :1789 Length:59
## 1st Qu.:14.5 Class :character 1st Qu.:1847 Class :character
## Median :29.0 Mode :character Median :1905 Mode :character
## Mean :29.0 Mean :1905
## 3rd Qu.:43.5 3rd Qu.:1963
## Max. :58.0 Max. :2021
In what follows, text files are generated from each row of dataframe and are stored in “texts” folder:
#presidents <- df[["president"]]|> unique() |>as.list()
for(i in 1:nrow(df)) { # for-loop over rows
df_i <- df[i, ]
name <- df_i$president
year <- df_i$year
text <- df_i$content
file_name <- paste(as.character(year),
as.character(name),
sep="-")
file_name <- paste(file_name, ".txt",
sep="")
loc <- paste("./data/texts/", file_name, sep="")
#writeLines(text, loc)
} loc <- "./data/texts/"
docs <- tm::VCorpus(DirSource(loc))
summary(docs) ## Length Class Mode
## 1789-George Washington.txt 2 PlainTextDocument list
## 1793-George Washington.txt 2 PlainTextDocument list
## 1797-John Adams.txt 2 PlainTextDocument list
## 1801-Thomas Jefferson.txt 2 PlainTextDocument list
## 1805-Thomas Jefferson.txt 2 PlainTextDocument list
## 1809-James Madison.txt 2 PlainTextDocument list
## 1813-James Madison.txt 2 PlainTextDocument list
## 1817-James Monroe.txt 2 PlainTextDocument list
## 1821-James Monroe.txt 2 PlainTextDocument list
## 1825-John Quincy Adams.txt 2 PlainTextDocument list
## 1829-Andrew Jackson.txt 2 PlainTextDocument list
## 1833-Andrew Jackson.txt 2 PlainTextDocument list
## 1837-Martin van Buren.txt 2 PlainTextDocument list
## 1841-William Henry Harrison.txt 2 PlainTextDocument list
## 1845-James K. Polk.txt 2 PlainTextDocument list
## 1849-Zachary Taylor.txt 2 PlainTextDocument list
## 1853-Franklin Pierce.txt 2 PlainTextDocument list
## 1857-James Buchanan.txt 2 PlainTextDocument list
## 1861-Abraham Lincoln.txt 2 PlainTextDocument list
## 1865-Abraham Lincoln.txt 2 PlainTextDocument list
## 1869-Ulysses S. Grant.txt 2 PlainTextDocument list
## 1873-Ulysses S. Grant.txt 2 PlainTextDocument list
## 1877-Rutherford B. Hayes.txt 2 PlainTextDocument list
## 1881-James A. Garfield.txt 2 PlainTextDocument list
## 1885-Grover Cleveland.txt 2 PlainTextDocument list
## 1889-Benjamin Harrison.txt 2 PlainTextDocument list
## 1893-Grover Cleveland.txt 2 PlainTextDocument list
## 1897-William McKinley.txt 2 PlainTextDocument list
## 1901-William McKinley.txt 2 PlainTextDocument list
## 1905-Theodore Roosevelt.txt 2 PlainTextDocument list
## 1909-William Howard Taft.txt 2 PlainTextDocument list
## 1913-Woodrow Wilson.txt 2 PlainTextDocument list
## 1917-Woodrow Wilson.txt 2 PlainTextDocument list
## 1921-Warren G. Harding.txt 2 PlainTextDocument list
## 1925-Calvin Coolidge.txt 2 PlainTextDocument list
## 1929-Herbert Hoover.txt 2 PlainTextDocument list
## 1933-Franklin D. Roosevelt.txt 2 PlainTextDocument list
## 1937-Franklin D. Roosevelt.txt 2 PlainTextDocument list
## 1941-Franklin D. Roosevelt.txt 2 PlainTextDocument list
## 1945-Franklin D. Roosevelt.txt 2 PlainTextDocument list
## 1949-Harry S. Truman.txt 2 PlainTextDocument list
## 1953-Dwight D. Eisenhower.txt 2 PlainTextDocument list
## 1957-Dwight D. Eisenhower.txt 2 PlainTextDocument list
## 1961-John F. Kennedy.txt 2 PlainTextDocument list
## 1965-Lyndon B. Johnson.txt 2 PlainTextDocument list
## 1969-Richard Nixon.txt 2 PlainTextDocument list
## 1973-Richard Nixon.txt 2 PlainTextDocument list
## 1977-Jimmy Carter.txt 2 PlainTextDocument list
## 1981-Ronald Reagan.txt 2 PlainTextDocument list
## 1985-Ronald Reagan.txt 2 PlainTextDocument list
## 1989-George Bush.txt 2 PlainTextDocument list
## 1993-William J. Clinton.txt 2 PlainTextDocument list
## 1997-William J. Clinton.txt 2 PlainTextDocument list
## 2001-George W. Bush.txt 2 PlainTextDocument list
## 2005-George W. Bush.txt 2 PlainTextDocument list
## 2009-Barack Obama.txt 2 PlainTextDocument list
## 2013-Barack Obama.txt 2 PlainTextDocument list
## 2017-Donald J. Trump.txt 2 PlainTextDocument list
## 2021-Joseph R. Biden.txt 2 PlainTextDocument list
inspect(docs[1])## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 1
##
## [[1]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 8617
writeLines(as.character(docs[1]))## list(list(content = c("", "Fellow-Citizens of the Senate and of the House of Representatives:", "Among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order, and received on the 14th day of the present month. On the one hand, I was summoned by my country, whose voice I can never hear but with veneration and love, from a retreat which I had chosen with the fondest predilection, and, in my flattering hopes, with an immutable decision, as the asylum of my declining years—a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination, and of frequent interruptions in my health to the gradual waste committed on it by time. On the other hand, the magnitude and difficulty of the trust to which the voice of my country called me, being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications, could not but overwhelm with despondence one who (inheriting inferior endowments from nature and unpracticed in the duties of civil administration) ought to be peculiarly conscious of his own deficiencies. In this conflict of emotions all I dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected. All I dare hope is that if, in executing this task, I have been too much swayed by a grateful remembrance of former instances, or by an affectionate sensibility to this transcendent proof of the confidence of my fellow-citizens, and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me, my error will be palliated by the motives which mislead [see APP note] me, and its consequences be judged by my country with some share of the partiality in which they originated.",
## "Such being the impressions under which I have, in obedience to the public summons, repaired to the present station, it would be peculiarly improper to omit in this first official act my fervent supplications to that Almighty Being who rules over the universe, who presides in the councils of nations, and whose providential aids can supply every human defect, that His benediction may consecrate to the liberties and happiness of the people of the United States a Government instituted by themselves for these essential purposes, and may enable every instrument employed in its administration to execute with success the functions allotted to his charge. In tendering this homage to the Great Author of every public and private good, I assure myself that it expresses your sentiments not less than my own, nor those of my fellow-citizens at large less than either. No people can be bound to acknowledge and adore the Invisible Hand which conducts the affairs of men more than those of the United States. Every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency; and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude, along with an humble anticipation of the future blessings which the past seem to presage. These reflections, arising out of the present crisis, have forced themselves too strongly on my mind to be suppressed. You will join with me, I trust, in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence.",
## "By the article establishing the executive department it is made the duty of the President \"to recommend to your consideration such measures as he shall judge necessary and expedient.\" The circumstances under which I now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled, and which, in defining your powers, designates the objects to which your attention is to be given. It will be more consistent with those circumstances, and far more congenial with the feelings which actuate me, to substitute, in place of a recommendation of particular measures, the tribute that is due to the talents, the rectitude, and the patriotism which adorn the characters selected to devise and adopt them. In these honorable qualifications I behold the surest pledges that as on one side no local prejudices or attachments, no separate views nor party animosities, will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests, so, on another, that the foundation of our national policy will be laid in the pure and immutable principles of private morality, and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world. I dwell on this prospect with every satisfaction which an ardent love for my country can inspire, since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness; between duty and advantage; between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity; since we ought to be no less persuaded that the propitious smiles of Heaven can never be expected on a nation that disregards the eternal rules of order and right which Heaven itself has ordained; and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered, perhaps, as deeply, as finally, staked on the experiment entrusted to the hands of the American people.",
## "Besides the ordinary objects submitted to your care, it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the Constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system, or by the degree of inquietude which has given birth to them. Instead of undertaking particular recommendations on this subject, in which I could be guided by no lights derived from official opportunities, I shall again give way to my entire confidence in your discernment and pursuit of the public good; for I assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government, or which ought to await the future lessons of experience, a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted.",
## "To the foregoing observations I have one to add, which will be most properly addressed to the House of Representatives. It concerns myself, and will therefore be as brief as possible. When I was first honored with a call into the service of my country, then on the eve of an arduous struggle for its liberties, the light in which I contemplated my duty required that I should renounce every pecuniary compensation. From this resolution I have in no instance departed; and being still under the impressions which produced it, I must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department, and must accordingly pray that the pecuniary estimates for the station in which I am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require.",
## "Having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together, I shall take my present leave; but not without resorting once more to the benign Parent of the Human Race in humble supplication that, since He has been pleased to favor the American people with opportunities for deliberating in perfect tranquillity, and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness, so His divine blessing may be equally conspicuous in the enlarged views, the temperate consultations, and the wise measures on which the success of this Government must depend.",
## ""), meta = list(author = character(0), datetimestamp = list(sec = 13.0839650630951, min = 2, hour = 23, mday = 2, mon = 0, year = 123, wday = 1, yday = 1, isdst = 0), description = character(0), heading = character(0), id = "1789-George Washington.txt", language = "en", origin = character(0))))
## list()
## list()
This project is dedicated to investigating text similarity between speeches from different presidents of US during various years, starting from 1789 and ending with 2021.
In Preprocessing section, numerous text mining tasks are implemented on all the docs.
In Word Frequency section, frequency of different terms in documents are analyzed and visualized.
In Doc Similarity, similarity between documents is measured, analyzed, and visualized.
In Conclusion, main findings are summarized.
The github repository for this package can be found in this link
The tm is a framework for text mining applications within R. Most functions used henceforth stems from this package.
docs <- tm::tm_map(docs,removePunctuation)
writeLines(as.character(docs[1])) ## list(list(content = c("", "FellowCitizens of the Senate and of the House of Representatives", "Among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order and received on the 14th day of the present month On the one hand I was summoned by my country whose voice I can never hear but with veneration and love from a retreat which I had chosen with the fondest predilection and in my flattering hopes with an immutable decision as the asylum of my declining years—a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination and of frequent interruptions in my health to the gradual waste committed on it by time On the other hand the magnitude and difficulty of the trust to which the voice of my country called me being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications could not but overwhelm with despondence one who inheriting inferior endowments from nature and unpracticed in the duties of civil administration ought to be peculiarly conscious of his own deficiencies In this conflict of emotions all I dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected All I dare hope is that if in executing this task I have been too much swayed by a grateful remembrance of former instances or by an affectionate sensibility to this transcendent proof of the confidence of my fellowcitizens and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me my error will be palliated by the motives which mislead see APP note me and its consequences be judged by my country with some share of the partiality in which they originated",
## "Such being the impressions under which I have in obedience to the public summons repaired to the present station it would be peculiarly improper to omit in this first official act my fervent supplications to that Almighty Being who rules over the universe who presides in the councils of nations and whose providential aids can supply every human defect that His benediction may consecrate to the liberties and happiness of the people of the United States a Government instituted by themselves for these essential purposes and may enable every instrument employed in its administration to execute with success the functions allotted to his charge In tendering this homage to the Great Author of every public and private good I assure myself that it expresses your sentiments not less than my own nor those of my fellowcitizens at large less than either No people can be bound to acknowledge and adore the Invisible Hand which conducts the affairs of men more than those of the United States Every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude along with an humble anticipation of the future blessings which the past seem to presage These reflections arising out of the present crisis have forced themselves too strongly on my mind to be suppressed You will join with me I trust in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence",
## "By the article establishing the executive department it is made the duty of the President to recommend to your consideration such measures as he shall judge necessary and expedient The circumstances under which I now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled and which in defining your powers designates the objects to which your attention is to be given It will be more consistent with those circumstances and far more congenial with the feelings which actuate me to substitute in place of a recommendation of particular measures the tribute that is due to the talents the rectitude and the patriotism which adorn the characters selected to devise and adopt them In these honorable qualifications I behold the surest pledges that as on one side no local prejudices or attachments no separate views nor party animosities will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests so on another that the foundation of our national policy will be laid in the pure and immutable principles of private morality and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world I dwell on this prospect with every satisfaction which an ardent love for my country can inspire since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness between duty and advantage between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity since we ought to be no less persuaded that the propitious smiles of Heaven can never be expected on a nation that disregards the eternal rules of order and right which Heaven itself has ordained and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered perhaps as deeply as finally staked on the experiment entrusted to the hands of the American people",
## "Besides the ordinary objects submitted to your care it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the Constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system or by the degree of inquietude which has given birth to them Instead of undertaking particular recommendations on this subject in which I could be guided by no lights derived from official opportunities I shall again give way to my entire confidence in your discernment and pursuit of the public good for I assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government or which ought to await the future lessons of experience a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted",
## "To the foregoing observations I have one to add which will be most properly addressed to the House of Representatives It concerns myself and will therefore be as brief as possible When I was first honored with a call into the service of my country then on the eve of an arduous struggle for its liberties the light in which I contemplated my duty required that I should renounce every pecuniary compensation From this resolution I have in no instance departed and being still under the impressions which produced it I must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department and must accordingly pray that the pecuniary estimates for the station in which I am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require",
## "Having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together I shall take my present leave but not without resorting once more to the benign Parent of the Human Race in humble supplication that since He has been pleased to favor the American people with opportunities for deliberating in perfect tranquillity and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness so His divine blessing may be equally conspicuous in the enlarged views the temperate consultations and the wise measures on which the success of this Government must depend",
## ""), meta = list(author = character(0), datetimestamp = list(sec = 13.0839650630951, min = 2, hour = 23, mday = 2, mon = 0, year = 123, wday = 1, yday = 1, isdst = 0), description = character(0), heading = character(0), id = "1789-George Washington.txt", language = "en", origin = character(0))))
## list()
## list()
for (j in seq(docs)) {
docs[[j]] <- gsub("/", " ", docs[[j]])
docs[[j]] <- gsub("@", " ", docs[[j]])
docs[[j]] <- gsub("\\|", " ", docs[[j]])
docs[[j]] <- gsub("\u2028", " ", docs[[j]]) # This is an ascii character that did not translate, so it had to be removed.
}
writeLines(as.character(docs[1]))## list(c("", "FellowCitizens of the Senate and of the House of Representatives", "Among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order and received on the 14th day of the present month On the one hand I was summoned by my country whose voice I can never hear but with veneration and love from a retreat which I had chosen with the fondest predilection and in my flattering hopes with an immutable decision as the asylum of my declining years—a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination and of frequent interruptions in my health to the gradual waste committed on it by time On the other hand the magnitude and difficulty of the trust to which the voice of my country called me being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications could not but overwhelm with despondence one who inheriting inferior endowments from nature and unpracticed in the duties of civil administration ought to be peculiarly conscious of his own deficiencies In this conflict of emotions all I dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected All I dare hope is that if in executing this task I have been too much swayed by a grateful remembrance of former instances or by an affectionate sensibility to this transcendent proof of the confidence of my fellowcitizens and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me my error will be palliated by the motives which mislead see APP note me and its consequences be judged by my country with some share of the partiality in which they originated",
## "Such being the impressions under which I have in obedience to the public summons repaired to the present station it would be peculiarly improper to omit in this first official act my fervent supplications to that Almighty Being who rules over the universe who presides in the councils of nations and whose providential aids can supply every human defect that His benediction may consecrate to the liberties and happiness of the people of the United States a Government instituted by themselves for these essential purposes and may enable every instrument employed in its administration to execute with success the functions allotted to his charge In tendering this homage to the Great Author of every public and private good I assure myself that it expresses your sentiments not less than my own nor those of my fellowcitizens at large less than either No people can be bound to acknowledge and adore the Invisible Hand which conducts the affairs of men more than those of the United States Every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude along with an humble anticipation of the future blessings which the past seem to presage These reflections arising out of the present crisis have forced themselves too strongly on my mind to be suppressed You will join with me I trust in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence",
## "By the article establishing the executive department it is made the duty of the President to recommend to your consideration such measures as he shall judge necessary and expedient The circumstances under which I now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled and which in defining your powers designates the objects to which your attention is to be given It will be more consistent with those circumstances and far more congenial with the feelings which actuate me to substitute in place of a recommendation of particular measures the tribute that is due to the talents the rectitude and the patriotism which adorn the characters selected to devise and adopt them In these honorable qualifications I behold the surest pledges that as on one side no local prejudices or attachments no separate views nor party animosities will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests so on another that the foundation of our national policy will be laid in the pure and immutable principles of private morality and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world I dwell on this prospect with every satisfaction which an ardent love for my country can inspire since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness between duty and advantage between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity since we ought to be no less persuaded that the propitious smiles of Heaven can never be expected on a nation that disregards the eternal rules of order and right which Heaven itself has ordained and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered perhaps as deeply as finally staked on the experiment entrusted to the hands of the American people",
## "Besides the ordinary objects submitted to your care it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the Constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system or by the degree of inquietude which has given birth to them Instead of undertaking particular recommendations on this subject in which I could be guided by no lights derived from official opportunities I shall again give way to my entire confidence in your discernment and pursuit of the public good for I assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government or which ought to await the future lessons of experience a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted",
## "To the foregoing observations I have one to add which will be most properly addressed to the House of Representatives It concerns myself and will therefore be as brief as possible When I was first honored with a call into the service of my country then on the eve of an arduous struggle for its liberties the light in which I contemplated my duty required that I should renounce every pecuniary compensation From this resolution I have in no instance departed and being still under the impressions which produced it I must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department and must accordingly pray that the pecuniary estimates for the station in which I am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require",
## "Having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together I shall take my present leave but not without resorting once more to the benign Parent of the Human Race in humble supplication that since He has been pleased to favor the American people with opportunities for deliberating in perfect tranquillity and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness so His divine blessing may be equally conspicuous in the enlarged views the temperate consultations and the wise measures on which the success of this Government must depend",
## ""))
## list()
## list()
docs <- tm::tm_map(docs, removeNumbers)
writeLines(as.character(docs[1])) ## list(c("", "FellowCitizens of the Senate and of the House of Representatives", "Among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order and received on the th day of the present month On the one hand I was summoned by my country whose voice I can never hear but with veneration and love from a retreat which I had chosen with the fondest predilection and in my flattering hopes with an immutable decision as the asylum of my declining years—a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination and of frequent interruptions in my health to the gradual waste committed on it by time On the other hand the magnitude and difficulty of the trust to which the voice of my country called me being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications could not but overwhelm with despondence one who inheriting inferior endowments from nature and unpracticed in the duties of civil administration ought to be peculiarly conscious of his own deficiencies In this conflict of emotions all I dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected All I dare hope is that if in executing this task I have been too much swayed by a grateful remembrance of former instances or by an affectionate sensibility to this transcendent proof of the confidence of my fellowcitizens and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me my error will be palliated by the motives which mislead see APP note me and its consequences be judged by my country with some share of the partiality in which they originated",
## "Such being the impressions under which I have in obedience to the public summons repaired to the present station it would be peculiarly improper to omit in this first official act my fervent supplications to that Almighty Being who rules over the universe who presides in the councils of nations and whose providential aids can supply every human defect that His benediction may consecrate to the liberties and happiness of the people of the United States a Government instituted by themselves for these essential purposes and may enable every instrument employed in its administration to execute with success the functions allotted to his charge In tendering this homage to the Great Author of every public and private good I assure myself that it expresses your sentiments not less than my own nor those of my fellowcitizens at large less than either No people can be bound to acknowledge and adore the Invisible Hand which conducts the affairs of men more than those of the United States Every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude along with an humble anticipation of the future blessings which the past seem to presage These reflections arising out of the present crisis have forced themselves too strongly on my mind to be suppressed You will join with me I trust in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence",
## "By the article establishing the executive department it is made the duty of the President to recommend to your consideration such measures as he shall judge necessary and expedient The circumstances under which I now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled and which in defining your powers designates the objects to which your attention is to be given It will be more consistent with those circumstances and far more congenial with the feelings which actuate me to substitute in place of a recommendation of particular measures the tribute that is due to the talents the rectitude and the patriotism which adorn the characters selected to devise and adopt them In these honorable qualifications I behold the surest pledges that as on one side no local prejudices or attachments no separate views nor party animosities will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests so on another that the foundation of our national policy will be laid in the pure and immutable principles of private morality and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world I dwell on this prospect with every satisfaction which an ardent love for my country can inspire since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness between duty and advantage between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity since we ought to be no less persuaded that the propitious smiles of Heaven can never be expected on a nation that disregards the eternal rules of order and right which Heaven itself has ordained and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered perhaps as deeply as finally staked on the experiment entrusted to the hands of the American people",
## "Besides the ordinary objects submitted to your care it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the Constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system or by the degree of inquietude which has given birth to them Instead of undertaking particular recommendations on this subject in which I could be guided by no lights derived from official opportunities I shall again give way to my entire confidence in your discernment and pursuit of the public good for I assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government or which ought to await the future lessons of experience a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted",
## "To the foregoing observations I have one to add which will be most properly addressed to the House of Representatives It concerns myself and will therefore be as brief as possible When I was first honored with a call into the service of my country then on the eve of an arduous struggle for its liberties the light in which I contemplated my duty required that I should renounce every pecuniary compensation From this resolution I have in no instance departed and being still under the impressions which produced it I must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department and must accordingly pray that the pecuniary estimates for the station in which I am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require",
## "Having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together I shall take my present leave but not without resorting once more to the benign Parent of the Human Race in humble supplication that since He has been pleased to favor the American people with opportunities for deliberating in perfect tranquillity and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness so His divine blessing may be equally conspicuous in the enlarged views the temperate consultations and the wise measures on which the success of this Government must depend",
## ""))
## list()
## list()
docs <- tm::tm_map(docs, tolower)
docs <- tm::tm_map(docs, PlainTextDocument)
DocsCopy <- docs
writeLines(as.character(docs[1])) ## list(list(content = c("", "fellowcitizens of the senate and of the house of representatives", "among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order and received on the th day of the present month on the one hand i was summoned by my country whose voice i can never hear but with veneration and love from a retreat which i had chosen with the fondest predilection and in my flattering hopes with an immutable decision as the asylum of my declining years—a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination and of frequent interruptions in my health to the gradual waste committed on it by time on the other hand the magnitude and difficulty of the trust to which the voice of my country called me being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications could not but overwhelm with despondence one who inheriting inferior endowments from nature and unpracticed in the duties of civil administration ought to be peculiarly conscious of his own deficiencies in this conflict of emotions all i dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected all i dare hope is that if in executing this task i have been too much swayed by a grateful remembrance of former instances or by an affectionate sensibility to this transcendent proof of the confidence of my fellowcitizens and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me my error will be palliated by the motives which mislead see app note me and its consequences be judged by my country with some share of the partiality in which they originated",
## "such being the impressions under which i have in obedience to the public summons repaired to the present station it would be peculiarly improper to omit in this first official act my fervent supplications to that almighty being who rules over the universe who presides in the councils of nations and whose providential aids can supply every human defect that his benediction may consecrate to the liberties and happiness of the people of the united states a government instituted by themselves for these essential purposes and may enable every instrument employed in its administration to execute with success the functions allotted to his charge in tendering this homage to the great author of every public and private good i assure myself that it expresses your sentiments not less than my own nor those of my fellowcitizens at large less than either no people can be bound to acknowledge and adore the invisible hand which conducts the affairs of men more than those of the united states every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude along with an humble anticipation of the future blessings which the past seem to presage these reflections arising out of the present crisis have forced themselves too strongly on my mind to be suppressed you will join with me i trust in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence",
## "by the article establishing the executive department it is made the duty of the president to recommend to your consideration such measures as he shall judge necessary and expedient the circumstances under which i now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled and which in defining your powers designates the objects to which your attention is to be given it will be more consistent with those circumstances and far more congenial with the feelings which actuate me to substitute in place of a recommendation of particular measures the tribute that is due to the talents the rectitude and the patriotism which adorn the characters selected to devise and adopt them in these honorable qualifications i behold the surest pledges that as on one side no local prejudices or attachments no separate views nor party animosities will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests so on another that the foundation of our national policy will be laid in the pure and immutable principles of private morality and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world i dwell on this prospect with every satisfaction which an ardent love for my country can inspire since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness between duty and advantage between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity since we ought to be no less persuaded that the propitious smiles of heaven can never be expected on a nation that disregards the eternal rules of order and right which heaven itself has ordained and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered perhaps as deeply as finally staked on the experiment entrusted to the hands of the american people",
## "besides the ordinary objects submitted to your care it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system or by the degree of inquietude which has given birth to them instead of undertaking particular recommendations on this subject in which i could be guided by no lights derived from official opportunities i shall again give way to my entire confidence in your discernment and pursuit of the public good for i assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government or which ought to await the future lessons of experience a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted",
## "to the foregoing observations i have one to add which will be most properly addressed to the house of representatives it concerns myself and will therefore be as brief as possible when i was first honored with a call into the service of my country then on the eve of an arduous struggle for its liberties the light in which i contemplated my duty required that i should renounce every pecuniary compensation from this resolution i have in no instance departed and being still under the impressions which produced it i must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department and must accordingly pray that the pecuniary estimates for the station in which i am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require",
## "having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together i shall take my present leave but not without resorting once more to the benign parent of the human race in humble supplication that since he has been pleased to favor the american people with opportunities for deliberating in perfect tranquillity and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness so his divine blessing may be equally conspicuous in the enlarged views the temperate consultations and the wise measures on which the success of this government must depend",
## ""), meta = list(author = character(0), datetimestamp = list(sec = 13.3212623596191, min = 2, hour = 23, mday = 2, mon = 0, year = 123, wday = 1, yday = 1, isdst = 0), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0))))
## list()
## list()
# For a list of the stopwords, see:
length(stopwords("english")) ## [1] 174
docs <- tm::tm_map(docs, removeWords, stopwords("english"))
docs <- tm::tm_map(docs, PlainTextDocument)
writeLines(as.character(docs[1]))## list(list(content = c("", "fellowcitizens senate house representatives", "among vicissitudes incident life event filled greater anxieties notification transmitted order received th day present month one hand summoned country whose voice can never hear veneration love retreat chosen fondest predilection flattering hopes immutable decision asylum declining years— retreat rendered every day necessary well dear addition habit inclination frequent interruptions health gradual waste committed time hand magnitude difficulty trust voice country called sufficient awaken wisest experienced citizens distrustful scrutiny qualifications overwhelm despondence one inheriting inferior endowments nature unpracticed duties civil administration peculiarly conscious deficiencies conflict emotions dare aver faithful study collect duty just appreciation every circumstance might affected dare hope executing task much swayed grateful remembrance former instances affectionate sensibility transcendent proof confidence fellowcitizens thence little consulted incapacity well disinclination weighty untried cares error will palliated motives mislead see app note consequences judged country share partiality originated",
## " impressions obedience public summons repaired present station peculiarly improper omit first official act fervent supplications almighty rules universe presides councils nations whose providential aids can supply every human defect benediction may consecrate liberties happiness people united states government instituted essential purposes may enable every instrument employed administration execute success functions allotted charge tendering homage great author every public private good assure expresses sentiments less fellowcitizens large less either people can bound acknowledge adore invisible hand conducts affairs men united states every step advanced character independent nation seems distinguished token providential agency important revolution just accomplished system united government tranquil deliberations voluntary consent many distinct communities event resulted can compared means governments established without return pious gratitude along humble anticipation future blessings past seem presage reflections arising present crisis forced strongly mind suppressed will join trust thinking none influence proceedings new free government can auspiciously commence",
## " article establishing executive department made duty president recommend consideration measures shall judge necessary expedient circumstances now meet will acquit entering subject refer great constitutional charter assembled defining powers designates objects attention given will consistent circumstances far congenial feelings actuate substitute place recommendation particular measures tribute due talents rectitude patriotism adorn characters selected devise adopt honorable qualifications behold surest pledges one side local prejudices attachments separate views party animosities will misdirect comprehensive equal eye watch great assemblage communities interests another foundation national policy will laid pure immutable principles private morality preeminence free government exemplified attributes can win affections citizens command respect world dwell prospect every satisfaction ardent love country can inspire since truth thoroughly established exists economy course nature indissoluble union virtue happiness duty advantage genuine maxims honest magnanimous policy solid rewards public prosperity felicity since less persuaded propitious smiles heaven can never expected nation disregards eternal rules order right heaven ordained since preservation sacred fire liberty destiny republican model government justly considered perhaps deeply finally staked experiment entrusted hands american people",
## "besides ordinary objects submitted care will remain judgment decide far exercise occasional power delegated fifth article constitution rendered expedient present juncture nature objections urged system degree inquietude given birth instead undertaking particular recommendations subject guided lights derived official opportunities shall give way entire confidence discernment pursuit public good assure whilst carefully avoid every alteration might endanger benefits united effective government await future lessons experience reverence characteristic rights freemen regard public harmony will sufficiently influence deliberations question far former can impregnably fortified latter safely advantageously promoted",
## " foregoing observations one add will properly addressed house representatives concerns will therefore brief possible first honored call service country eve arduous struggle liberties light contemplated duty required renounce every pecuniary compensation resolution instance departed still impressions produced must decline inapplicable share personal emoluments may indispensably included permanent provision executive department must accordingly pray pecuniary estimates station placed may continuance limited actual expenditures public good may thought require",
## " thus imparted sentiments awakened occasion brings us together shall take present leave without resorting benign parent human race humble supplication since pleased favor american people opportunities deliberating perfect tranquillity dispositions deciding unparalleled unanimity form government security union advancement happiness divine blessing may equally conspicuous enlarged views temperate consultations wise measures success government must depend",
## ""), meta = list(author = character(0), datetimestamp = list(sec = 13.7044150829315, min = 2, hour = 23, mday = 2, mon = 0, year = 123, wday = 1, yday = 1, isdst = 0), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0))))
## list()
## list()
#docs <- tm::tm_map(docs, removeWords, c("syllogism", "tautology"))
# Just remove the words "syllogism" and "tautology".
# These words don't actually exist in these texts. But this is how you would remove them if they had.If you wish to preserve a concept is only apparent as a collection of two or more words, then you can combine them or reduce them to a meaningful acronym before you begin the analysis. Here, I am using examples that are particular to qualitative data analysis.
for (j in seq(docs))
{
docs[[j]] <- gsub("fake news", "fake_news", docs[[j]])
docs[[j]] <- gsub("inner city", "inner-city", docs[[j]])
docs[[j]] <- gsub("politically correct", "politically_correct", docs[[j]])
}
docs <- tm_map(docs, PlainTextDocument)Common words ending e.g. “ing”, “es”, “s”
## Note: I did not run this section of code for this particular example.
docs_st <- tm_map(docs, stemDocument)
docs_st <- tm_map(docs_st, PlainTextDocument)
writeLines(as.character(docs_st[1])) # Check to see if it worked.## list(list(content = c("", "fellowcitizen senat hous repres", "among vicissitud incid life event fill greater anxieti notif transmit order receiv th day present month one hand summon countri whose voic can never hear vener love retreat chosen fondest predilect flatter hope immut decis asylum declin years— retreat render everi day necessari well dear addit habit inclin frequent interrupt health gradual wast commit time hand magnitud difficulti trust voic countri call suffici awaken wisest experienc citizen distrust scrutini qualif overwhelm despond one inherit inferior endow natur unpract duti civil administr peculiar conscious defici conflict emot dare aver faith studi collect duti just appreci everi circumst might affect dare hope execut task much sway grate remembr former instanc affection sensibl transcend proof confid fellowcitizen thenc littl consult incapac well disinclin weighti untri care error will palliat motiv mislead see app note consequ judg countri share partial origin",
## "impress obedi public summon repair present station peculiar improp omit first offici act fervent supplic almighti rule univers presid council nation whose providenti aid can suppli everi human defect benedict may consecr liberti happi peopl unit state govern institut essenti purpos may enabl everi instrument employ administr execut success function allot charg tender homag great author everi public privat good assur express sentiment less fellowcitizen larg less either peopl can bound acknowledg ador invis hand conduct affair men unit state everi step advanc charact independ nation seem distinguish token providenti agenc import revolut just accomplish system unit govern tranquil deliber voluntari consent mani distinct communiti event result can compar mean govern establish without return pious gratitud along humbl anticip futur bless past seem presag reflect aris present crisi forc strong mind suppress will join trust think none influenc proceed new free govern can auspici commenc",
## "articl establish execut depart made duti presid recommend consider measur shall judg necessari expedi circumst now meet will acquit enter subject refer great constitut charter assembl defin power design object attent given will consist circumst far congeni feel actuat substitut place recommend particular measur tribut due talent rectitud patriot adorn charact select devis adopt honor qualif behold surest pledg one side local prejudic attach separ view parti animos will misdirect comprehens equal eye watch great assemblag communiti interest anoth foundat nation polici will laid pure immut principl privat moral preemin free govern exemplifi attribut can win affect citizen command respect world dwell prospect everi satisfact ardent love countri can inspir sinc truth thorough establish exist economi cours natur indissolubl union virtu happi duti advantag genuin maxim honest magnanim polici solid reward public prosper felic sinc less persuad propiti smile heaven can never expect nation disregard etern rule order right heaven ordain sinc preserv sacr fire liberti destini republican model govern just consid perhap deepli final stake experi entrust hand american peopl",
## "besid ordinari object submit care will remain judgment decid far exercis occasion power deleg fifth articl constitut render expedi present junctur natur object urg system degre inquietud given birth instead undertak particular recommend subject guid light deriv offici opportun shall give way entir confid discern pursuit public good assur whilst care avoid everi alter might endang benefit unit effect govern await futur lesson experi rever characterist right freemen regard public harmoni will suffici influenc deliber question far former can impregn fortifi latter safe advantag promot",
## "forego observ one add will proper address hous repres concern will therefor brief possibl first honor call servic countri eve arduous struggl liberti light contempl duti requir renounc everi pecuniari compens resolut instanc depart still impress produc must declin inapplic share person emolu may indispens includ perman provis execut depart must accord pray pecuniari estim station place may continu limit actual expenditur public good may thought requir", "thus impart sentiment awaken occas bring us togeth shall take present leav without resort benign parent human race humbl supplic sinc pleas favor american peopl opportun deliber perfect tranquil disposit decid unparallel unanim form govern secur union advanc happi divin bless may equal conspicu enlarg view temper consult wise measur success govern must depend",
## ""), meta = list(author = character(0), datetimestamp = list(sec = 13.9692857265472, min = 2, hour = 23, mday = 2, mon = 0, year = 123, wday = 1, yday = 1, isdst = 0), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0))))
## list()
## list()
# docs <- docs_stdocs <- tm_map(docs, stripWhitespace)
writeLines(as.character(docs[1]))## list(list(content = c("", "fellowcitizens senate house representatives", "among vicissitudes incident life event filled greater anxieties notification transmitted order received th day present month one hand summoned country whose voice can never hear veneration love retreat chosen fondest predilection flattering hopes immutable decision asylum declining years— retreat rendered every day necessary well dear addition habit inclination frequent interruptions health gradual waste committed time hand magnitude difficulty trust voice country called sufficient awaken wisest experienced citizens distrustful scrutiny qualifications overwhelm despondence one inheriting inferior endowments nature unpracticed duties civil administration peculiarly conscious deficiencies conflict emotions dare aver faithful study collect duty just appreciation every circumstance might affected dare hope executing task much swayed grateful remembrance former instances affectionate sensibility transcendent proof confidence fellowcitizens thence little consulted incapacity well disinclination weighty untried cares error will palliated motives mislead see app note consequences judged country share partiality originated",
## " impressions obedience public summons repaired present station peculiarly improper omit first official act fervent supplications almighty rules universe presides councils nations whose providential aids can supply every human defect benediction may consecrate liberties happiness people united states government instituted essential purposes may enable every instrument employed administration execute success functions allotted charge tendering homage great author every public private good assure expresses sentiments less fellowcitizens large less either people can bound acknowledge adore invisible hand conducts affairs men united states every step advanced character independent nation seems distinguished token providential agency important revolution just accomplished system united government tranquil deliberations voluntary consent many distinct communities event resulted can compared means governments established without return pious gratitude along humble anticipation future blessings past seem presage reflections arising present crisis forced strongly mind suppressed will join trust thinking none influence proceedings new free government can auspiciously commence",
## " article establishing executive department made duty president recommend consideration measures shall judge necessary expedient circumstances now meet will acquit entering subject refer great constitutional charter assembled defining powers designates objects attention given will consistent circumstances far congenial feelings actuate substitute place recommendation particular measures tribute due talents rectitude patriotism adorn characters selected devise adopt honorable qualifications behold surest pledges one side local prejudices attachments separate views party animosities will misdirect comprehensive equal eye watch great assemblage communities interests another foundation national policy will laid pure immutable principles private morality preeminence free government exemplified attributes can win affections citizens command respect world dwell prospect every satisfaction ardent love country can inspire since truth thoroughly established exists economy course nature indissoluble union virtue happiness duty advantage genuine maxims honest magnanimous policy solid rewards public prosperity felicity since less persuaded propitious smiles heaven can never expected nation disregards eternal rules order right heaven ordained since preservation sacred fire liberty destiny republican model government justly considered perhaps deeply finally staked experiment entrusted hands american people",
## "besides ordinary objects submitted care will remain judgment decide far exercise occasional power delegated fifth article constitution rendered expedient present juncture nature objections urged system degree inquietude given birth instead undertaking particular recommendations subject guided lights derived official opportunities shall give way entire confidence discernment pursuit public good assure whilst carefully avoid every alteration might endanger benefits united effective government await future lessons experience reverence characteristic rights freemen regard public harmony will sufficiently influence deliberations question far former can impregnably fortified latter safely advantageously promoted",
## " foregoing observations one add will properly addressed house representatives concerns will therefore brief possible first honored call service country eve arduous struggle liberties light contemplated duty required renounce every pecuniary compensation resolution instance departed still impressions produced must decline inapplicable share personal emoluments may indispensably included permanent provision executive department must accordingly pray pecuniary estimates station placed may continuance limited actual expenditures public good may thought require",
## " thus imparted sentiments awakened occasion brings us together shall take present leave without resorting benign parent human race humble supplication since pleased favor american people opportunities deliberating perfect tranquillity dispositions deciding unparalleled unanimity form government security union advancement happiness divine blessing may equally conspicuous enlarged views temperate consultations wise measures success government must depend", ""), meta = list(author = character(0), datetimestamp = list(
## sec = 13.7956876754761, min = 2, hour = 23, mday = 2, mon = 0, year = 123, wday = 1, yday = 1, isdst = 0), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0))))
## list()
## list()
Be sure to use the following script once you have completed preprocessing. This tells R to treat the preprocessed documents as text documents.
docs <- tm::tm_map(docs, stripWhitespace)
writeLines(as.character(docs[1]))## list(list(content = c("", "fellowcitizens senate house representatives", "among vicissitudes incident life event filled greater anxieties notification transmitted order received th day present month one hand summoned country whose voice can never hear veneration love retreat chosen fondest predilection flattering hopes immutable decision asylum declining years— retreat rendered every day necessary well dear addition habit inclination frequent interruptions health gradual waste committed time hand magnitude difficulty trust voice country called sufficient awaken wisest experienced citizens distrustful scrutiny qualifications overwhelm despondence one inheriting inferior endowments nature unpracticed duties civil administration peculiarly conscious deficiencies conflict emotions dare aver faithful study collect duty just appreciation every circumstance might affected dare hope executing task much swayed grateful remembrance former instances affectionate sensibility transcendent proof confidence fellowcitizens thence little consulted incapacity well disinclination weighty untried cares error will palliated motives mislead see app note consequences judged country share partiality originated",
## " impressions obedience public summons repaired present station peculiarly improper omit first official act fervent supplications almighty rules universe presides councils nations whose providential aids can supply every human defect benediction may consecrate liberties happiness people united states government instituted essential purposes may enable every instrument employed administration execute success functions allotted charge tendering homage great author every public private good assure expresses sentiments less fellowcitizens large less either people can bound acknowledge adore invisible hand conducts affairs men united states every step advanced character independent nation seems distinguished token providential agency important revolution just accomplished system united government tranquil deliberations voluntary consent many distinct communities event resulted can compared means governments established without return pious gratitude along humble anticipation future blessings past seem presage reflections arising present crisis forced strongly mind suppressed will join trust thinking none influence proceedings new free government can auspiciously commence",
## " article establishing executive department made duty president recommend consideration measures shall judge necessary expedient circumstances now meet will acquit entering subject refer great constitutional charter assembled defining powers designates objects attention given will consistent circumstances far congenial feelings actuate substitute place recommendation particular measures tribute due talents rectitude patriotism adorn characters selected devise adopt honorable qualifications behold surest pledges one side local prejudices attachments separate views party animosities will misdirect comprehensive equal eye watch great assemblage communities interests another foundation national policy will laid pure immutable principles private morality preeminence free government exemplified attributes can win affections citizens command respect world dwell prospect every satisfaction ardent love country can inspire since truth thoroughly established exists economy course nature indissoluble union virtue happiness duty advantage genuine maxims honest magnanimous policy solid rewards public prosperity felicity since less persuaded propitious smiles heaven can never expected nation disregards eternal rules order right heaven ordained since preservation sacred fire liberty destiny republican model government justly considered perhaps deeply finally staked experiment entrusted hands american people",
## "besides ordinary objects submitted care will remain judgment decide far exercise occasional power delegated fifth article constitution rendered expedient present juncture nature objections urged system degree inquietude given birth instead undertaking particular recommendations subject guided lights derived official opportunities shall give way entire confidence discernment pursuit public good assure whilst carefully avoid every alteration might endanger benefits united effective government await future lessons experience reverence characteristic rights freemen regard public harmony will sufficiently influence deliberations question far former can impregnably fortified latter safely advantageously promoted",
## " foregoing observations one add will properly addressed house representatives concerns will therefore brief possible first honored call service country eve arduous struggle liberties light contemplated duty required renounce every pecuniary compensation resolution instance departed still impressions produced must decline inapplicable share personal emoluments may indispensably included permanent provision executive department must accordingly pray pecuniary estimates station placed may continuance limited actual expenditures public good may thought require",
## " thus imparted sentiments awakened occasion brings us together shall take present leave without resorting benign parent human race humble supplication since pleased favor american people opportunities deliberating perfect tranquillity dispositions deciding unparalleled unanimity form government security union advancement happiness divine blessing may equally conspicuous enlarged views temperate consultations wise measures success government must depend", ""), meta = list(author = character(0), datetimestamp = list(
## sec = 13.7956876754761, min = 2, hour = 23, mday = 2, mon = 0, year = 123, wday = 1, yday = 1, isdst = 0), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0))))
## list()
## list()
nrow(df)## [1] 59
#Elnaz
for(i in 1:nrow(df)) { # for-loop over rows
df_i <- df[i, ]
name <- df_i$president
year <- df_i$year
text <- df_i$content
file_name <- paste(as.character(year),
as.character(name),
sep="-")
file_name <- paste(file_name, ".txt",
sep="")
loc <- paste("./data/pre_processed/", file_name, sep="")
writeLines(as.character(docs[[i]]), loc)
}dtm <- tm::DocumentTermMatrix(docs)
dtm ## <<DocumentTermMatrix (documents: 59, terms: 9495)>>
## Non-/sparse entries: 40113/520092
## Sparsity : 93%
## Maximal term length: 23
## Weighting : term frequency (tf)
Storing transpose of matrix
tdm <- tm::TermDocumentMatrix(docs)
tdm ## <<TermDocumentMatrix (terms: 9495, documents: 59)>>
## Non-/sparse entries: 40113/520092
## Sparsity : 93%
## Maximal term length: 23
## Weighting : term frequency (tf)
freq <- colSums(as.matrix(dtm))
length(freq) ## [1] 9495
ord <- order(freq)
m <- as.matrix(dtm)
dim(m) ## [1] 59 9495
Store the matrix to memory
#write.csv(m, file="DocumentTermMatrix.csv") # Start by removing sparse terms:
dtms <- removeSparseTerms(dtm, 0.2) # This makes a matrix that is 20% empty space, maximum.
dtms## <<DocumentTermMatrix (documents: 59, terms: 25)>>
## Non-/sparse entries: 1298/177
## Sparsity : 12%
## Maximal term length: 10
## Weighting : term frequency (tf)
freq <- colSums(as.matrix(dtm))Least frequent
head(table(freq), 20) ## freq
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 4154 1404 788 535 359 291 226 177 162 146 112 85 82 90 43 59
## 17 18 19 20
## 49 35 47 38
The top number is the frequency with which words appear and the bottom number reflects how many words appear that frequently.
Most frequent:
tail(table(freq), 40) ## freq
## 134 137 138 139 142 143 147 150 155 157 159 171 179 184 185 198 207 210 221 222
## 1 1 2 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1
## 227 232 240 250 256 267 302 303 304 314 318 337 341 346 373 374 488 567 576 942
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
View a table of the terms we selected when we removed sparse terms in subsection Remove sparse words
freq <- sort(colSums(as.matrix(dtm)), decreasing=TRUE)
freq |> head(20)## will people government can upon must great
## 942 576 567 488 374 373 346
## may states world shall country nation every
## 341 337 318 314 304 303 302
## one peace new power now public
## 267 256 250 240 232 227
Identify all terms that appear frequently
findFreqTerms(dtm, lowfreq=50) |> head(20)## [1] "act" "action" "administration" "also"
## [5] "always" "america" "american" "americans"
## [9] "among" "another" "authority" "become"
## [13] "believe" "best" "better" "beyond"
## [17] "business" "called" "can" "cause"
Another approach to perform the same task:
wf <- data.frame(word=names(freq), freq=freq)
head(wf) ## word freq
## will will 942
## people people 576
## government government 567
## can can 488
## upon upon 374
## must must 373
p <- ggplot(subset(wf, freq>100), aes(x = reorder(word, -freq), y = freq)) +
geom_bar(stat = "identity") +
theme(axis.text.x=element_text(angle=45, hjust=1))
p tm::findAssocs(dtm, c("government" , "states"), corlimit=0.75)## $government
## system
## 0.79
##
## $states
## powers sections united
## 0.77 0.76 0.76
findAssocs(dtms, "government", corlimit=0.70) # specifying a correlation limit of 0.95 ## $government
## states
## 0.75
Word Clouds
Plot words that occur at least 25 times.
Colorized version:
set.seed(142)
wordcloud::wordcloud(names(freq), freq, min.freq=20, scale=c(5, .1), colors=brewer.pal(6, "Dark2")) Plot words that occur at least 100 times.
set.seed(142)
dark2 <- brewer.pal(6, "Dark2")
wordcloud::wordcloud(names(freq), freq, max.words=100, rot.per=0.2, colors=dark2) d <- dist(t(dtms), method="euclidian")
fit <- hclust(d=d, method="complete") # for a different look try substituting: method="ward.D"
fit ##
## Call:
## hclust(d = d, method = "complete")
##
## Cluster method : complete
## Distance : euclidean
## Number of objects: 25
plot(fit, hang=-1)plot.new()
plot(fit, hang=-1)
groups <- cutree(fit, k=6) # "k=" defines the number of clusters you are using
rect.hclust(fit, k=6, border="red") # draw dendogram with red borders around the 6 clustersd <- dist(t(dtms), method="euclidian")
kfit <- kmeans(d, 2)
clusplot(as.matrix(d), kfit$cluster, color=T, shade=T, labels=2, lines=0)d <- dist(t(dtms), method="euclidian")
kfit <- kmeans(d, 4)
clusplot(as.matrix(d), kfit$cluster, color=T, shade=T, labels=2, lines=0)//TODO: Perform doc similarity using the textreuse library, analyze and visualize reults.
#loc <- "/home/hamed/Documents/R/MADS-NLP/data/texts"
#docs <- tm::VCorpus(DirSource(loc))
loc <- "./data/pre_processed/"#Elnaz
corpus <- TextReuseCorpus(dir=loc)
comparisons <- pairwise_compare(corpus, jaccard_similarity)
compare_df <- pairwise_candidates(comparisons)
compare_df <- as.data.frame(compare_df,
col.names = names(compare_df))
#compare_df <- compare_df[order(compare_df$score,decreasing=TRUE)]
compare_df <- compare_df[order(compare_df$score,decreasing=TRUE),]
compare_df |> head(20)## a b score
## 38 1789-George Washington 1941-Franklin D. Roosevelt 0.008675079
## 879 1857-James Buchanan 1973-Richard Nixon 0.006329114
## 182 1801-Thomas Jefferson 1845-James K. Polk 0.006201044
## 122 1797-John Adams 1825-John Quincy Adams 0.006193229
## 1621 1969-Richard Nixon 1973-Richard Nixon 0.005293806
## 1234 1897-William McKinley 1973-Richard Nixon 0.004983980
## 1697 2001-George W. Bush 2005-George W. Bush 0.004841313
## 860 1857-James Buchanan 1897-William McKinley 0.004230885
## 1400 1921-Warren G. Harding 1977-Jimmy Carter 0.003880983
## 1703 2005-George W. Bush 2013-Barack Obama 0.003807711
## 540 1829-Andrew Jackson 1849-Zachary Taylor 0.003759398
## 1680 1989-George Bush 2009-Barack Obama 0.003473730
## 640 1837-Martin van Buren 1877-Rutherford B. Hayes 0.003238342
## 1594 1961-John F. Kennedy 1973-Richard Nixon 0.003211304
## 1700 2001-George W. Bush 2017-Donald J. Trump 0.003211304
## 192 1801-Thomas Jefferson 1885-Grover Cleveland 0.003095975
## 1527 1945-Franklin D. Roosevelt 1969-Richard Nixon 0.003093581
## 724 1845-James K. Polk 1857-James Buchanan 0.003040354
## 1140 1885-Grover Cleveland 1981-Ronald Reagan 0.003021148
## 1216 1897-William McKinley 1901-William McKinley 0.002961500
#Najada
corpus## TextReuseCorpus
## Number of documents: 59
## hash_func : hash_string
## tokenizer : tokenize_ngrams
writeLines(as.character(corpus[1]))## list(`1789-George Washington` = list(content = "\nfellowcitizens senate house representatives\namong vicissitudes incident life event filled greater anxieties notification transmitted order received th day present month one hand summoned country whose voice can never hear veneration love retreat chosen fondest predilection flattering hopes immutable decision asylum declining years— retreat rendered every day necessary well dear addition habit inclination frequent interruptions health gradual waste committed time hand magnitude difficulty trust voice country called sufficient awaken wisest experienced citizens distrustful scrutiny qualifications overwhelm despondence one inheriting inferior endowments nature unpracticed duties civil administration peculiarly conscious deficiencies conflict emotions dare aver faithful study collect duty just appreciation every circumstance might affected dare hope executing task much swayed grateful remembrance former instances affectionate sensibility transcendent proof confidence fellowcitizens thence little consulted incapacity well disinclination weighty untried cares error will palliated motives mislead see app note consequences judged country share partiality originated\n impressions obedience public summons repaired present station peculiarly improper omit first official act fervent supplications almighty rules universe presides councils nations whose providential aids can supply every human defect benediction may consecrate liberties happiness people united states government instituted essential purposes may enable every instrument employed administration execute success functions allotted charge tendering homage great author every public private good assure expresses sentiments less fellowcitizens large less either people can bound acknowledge adore invisible hand conducts affairs men united states every step advanced character independent nation seems distinguished token providential agency important revolution just accomplished system united government tranquil deliberations voluntary consent many distinct communities event resulted can compared means governments established without return pious gratitude along humble anticipation future blessings past seem presage reflections arising present crisis forced strongly mind suppressed will join trust thinking none influence proceedings new free government can auspiciously commence\n article establishing executive department made duty president recommend consideration measures shall judge necessary expedient circumstances now meet will acquit entering subject refer great constitutional charter assembled defining powers designates objects attention given will consistent circumstances far congenial feelings actuate substitute place recommendation particular measures tribute due talents rectitude patriotism adorn characters selected devise adopt honorable qualifications behold surest pledges one side local prejudices attachments separate views party animosities will misdirect comprehensive equal eye watch great assemblage communities interests another foundation national policy will laid pure immutable principles private morality preeminence free government exemplified attributes can win affections citizens command respect world dwell prospect every satisfaction ardent love country can inspire since truth thoroughly established exists economy course nature indissoluble union virtue happiness duty advantage genuine maxims honest magnanimous policy solid rewards public prosperity felicity since less persuaded propitious smiles heaven can never expected nation disregards eternal rules order right heaven ordained since preservation sacred fire liberty destiny republican model government justly considered perhaps deeply finally staked experiment entrusted hands american people\nbesides ordinary objects submitted care will remain judgment decide far exercise occasional power delegated fifth article constitution rendered expedient present juncture nature objections urged system degree inquietude given birth instead undertaking particular recommendations subject guided lights derived official opportunities shall give way entire confidence discernment pursuit public good assure whilst carefully avoid every alteration might endanger benefits united effective government await future lessons experience reverence characteristic rights freemen regard public harmony will sufficiently influence deliberations question far former can impregnably fortified latter safely advantageously promoted\n foregoing observations one add will properly addressed house representatives concerns will therefore brief possible first honored call service country eve arduous struggle liberties light contemplated duty required renounce every pecuniary compensation resolution instance departed still impressions produced must decline inapplicable share personal emoluments may indispensably included permanent provision executive department must accordingly pray pecuniary estimates station placed may continuance limited actual expenditures public good may thought require\n thus imparted sentiments awakened occasion brings us together shall take present leave without resorting benign parent human race humble supplication since pleased favor american people opportunities deliberating perfect tranquillity dispositions deciding unparalleled unanimity form government security union advancement happiness divine blessing may equally conspicuous enlarged views temperate consultations wise measures success government must depend\n",
## tokens = NULL, hashes = c(587687171, 1400508662, -1528904364, -1176008363, 2104028628, 242661590, -1305151396, -1007533842, 1537695272, 716812531, 1724523801, -2029856456, -1559951769, 248799671, 751783772, 1056616327, -365629900, 2028634587, -1779240488, 584989554, 966616660, 2023068956, -461585151, 1235627038, 1461987856, 2109740839, 296398647, 1516495441, -1759252034, -950716712, -1700284989, 474818159, -2145645941, 959005317, 1792189935, 526937258, 1320515610, -1870374867, 533049850, -1667500355,
## -2013349359, 1181689723, 2091170364, 1446288897, -1291093787, 678659832, -66358167, -954065810, -1186750319, 397886169, 1947985790, 1787954948, -1376396193, 2024928609, -1993756363, -2035282470, 1709616450, -1142690907, -87400543, -1780932891, 1275769164, -1238247474, -1795675854, 2135756502, -147093800, -555192427, -1320760355, 649342408, 443548482, 61830623, 198592979, -603050151, -1728163943, -1792610003, -51009840, -1302594033, 996085435, 1526349563, 678044707, 453523929, 586226657, -1699176042,
## 169265343, -515095572, -395221959, -99484685, -294414424, -1126964556, -1403486852, 816456272, 733257215, 920525667, -345995926, -1823464731, 1147597892, 1354415655, 1854078669, -1621228001, 1927301929, -1345522747, 1038471858, -244681622, -892578995, 72381548, 1270857288, 849915209, 993045926, -1941284721, 1545847062, 38047156, 698790758, 1074535113, -113479895, -1989511143, -1755919354, 1745815471, 2025787534, -1278537472, 1513749684, -723464549, 1796515351, -512470495, -1772972627, -103986019,
## -541837969, -332140653, -41938622, 935988054, -287721816, 226643687, 254446636, -1205363655, -81339928, 1916956172, -1114498525, 1973504928, -934931095, 708855922, -1888749922, 2087149692, 2144294272, 1593169418, -1927058696, -1777509480, -1664836722, -1020860280, -1502327335, -968650868, -649366632, -494834276, -2024279898, 410558282, 1189368521, 1323869532, 1338465938, 467733909, -1018544674, 1969474929, -371125453, -9171236, 1519063877, 1286598646, 1424388929, 186099284, -1770712336, -1443119936,
## 831240281, 167302766, 1386444513, 2005304832, -202627190, -834445537, -1368329272, 380808289, -551303023, 1934873975, -154147308, 945961261, 625181640, 1874442376, -1073232328, -193499807, 714424653, -515533330, 2128767955, 562009042, 1404304799, 1619715984, -946751896, -869481633, 875871073, -1770942317, -634084209, 1231605490, -495036420, -2068560466, 2031343882, 536520523, 1063547700, 1637046830, -1286846358, -1211670658, -757319013, 1335579505, -645935361, 2135177199, -541915347, 1330624931,
## 636932188, 1522355160, 1258497539, 1048273565, 557472728, -1156793337, 1297653750, 1853178493, 37273762, -956435664, -277103692, 770234992, -1152061625, 299770228, -180120082, 965657393, 1427881378, -121825537, -593430032, 1120961923, 1853608939, -227009182, -545589378, 604343275, 54439085, -1551532025, -207087041, 66678883, 1925704464, -2045922252, 129959685, 1861493628, -1964043475, 942061079, 987665613, -916763261, -1163102381, -884820054, 496757108, -1440161153, -128751657, 2126468541, 506779190,
## -690778854, -470997245, -1596300445, -668638245, -1325980228, -953995496, 1876751852, -700155755, 564026092, -1567717831, -987311879, 1971826499, 428076561, -1967041995, 1994730386, -709766214, -709741389, 2140793596, 1297607958, 1901773077, -1117184829, 305571156, 359973086, -111077089, 959816120, -984768994, -280513398, -549216946, -299548882, 847788327, -791141545, 721879760, 461320220, -1735365414, 65188856, 1592314897, 728532517, 1870809411, -93141376, -1694462618, 467593226, 923382187,
## 1985266923, 1998388686, -1345495541, 819029135, 2001137044, -204338349, 1315007312, 1149535113, -1738771752, 57956568, -1725259379, 383530898, -812394143, -291585477, -1781070278, -38337638, -2081460016, 2069817247, 1162777367, 1368678671, 712542515, -140075497, 1506746088, 384065393, -405588973, 1902955964, 202742759, -1045686688, -753651315, 1110107017, -1921545089, 1652269712, -2095806876, -1480277942, 585069962, 776673625, 861096816, 983589687, -1509598572, 841473833, -2101045069, -1105440689,
## 1116502339, -589244334, -627210225, 1558977457, -1187165814, -2072325718, 625008151, 1412345682, 119942981, 1353418914, -582658888, -1354122991, 729582651, 805068727, -380399197, -985862831, -138779692, -2016268868, -816225859, -1513153725, -1760765814, -1232844426, -1439682084, 1949345750, -1562470364, 1150362467, -109966971, 1474149194, 53748794, -1358513161, 1294174501, -109789284, 1235827108, 151781260, -1401449250, -308703952, -1352487516, 320940982, -2142548884, 8175128, -396966452, -1118882259,
## 207823984, -1252670205, -1932639948, -2076541997, 995440370, -16760181, -749012868, -1424971204, 2095115152, 2130148753, 355575399, 212603945, -715715317, -1039787095, 1548652465, -926828080, 209190793, 389547656, 770650920, 519071644, 1721560869, -2055550679, 995539959, 1349426469, -272386170, -1054033410, -1388708457, 2017121382, -1302657021, 976160707, -915577174, -1401844529, -1058969706, 430503161, -1586187242, -1676540359, -443313598, 1348543069, 1304320134, 861286452, 72735592, -1627571465,
## 1134277041, -1959208081, 864590062, 574606785, -1112220517, -1428687595, -1783787317, -1400688237, -1450441702, -2124775292, -1807714059, 399195817, -277514201, 1660255958, 592346957, -1946457483, -432533824, -1974694602, 310646559, -802576598, -974357456, 317468672, 612727181, -1496191349, -692697901, -874521377, 786167592, -2122350516, 1062919638, -708757281, -1461388590, 1056073807, 1340225372, 781897539, -1973499855, -1795965144, 965418590, 746919453, -1116538561, -1785986764, -1649185227,
## 382250480, -436911901, 503703024, 1842305762, 1425240121, 1642198257, 1840809897, -1086012836, -721967427, 1413207480, 743667063, 402880650, 1393967382, 571580618, 282186835, -191395883, 1866940704, -1725704000, 2091777396, 614036410, -1684721682, -1608505298, -1223100962, 975572501, -1892854512, 1448058632, 1673360040, 1511845018, 950228943, -1644886043, -672037095, -1466344035, 869303991, -1792986760, -1219066159, -1553177641, 1211158170, 2047196414, -480494287, 1642867115, -1039588290, -2098084322,
## 580935991, 1062508689, 1947468937, -293209911, 316754159, -2119615852, 1544563932, -1272015818, 1290136501, 1752833319, -628275304, -1957030177, -2123663366, -1854701749, 1538253965, -1276970472, 954249127, 490002465, 366039354, 446832742, 2097967162, 1628197313, -2106802246, -745737230, -1773610017, -1509061043, 1969614321, 1667617418, -1775572338, -61528855, -881661575, 758075731, 808992127, 407763413, -1685772086, 88988503, -485215697, 1224660249, 1262754192, 1316360811, 730725556, -1895962040,
## 739632466, 112190582, -760569821, -696357463, 321222347, -932152648, -181125678, 1331213186, -37250300, 1785451918, 1984911491, 1049752826, 171265737, 648581895, 84716172, -1935422683, -1887938612, 1730609164, -891045666, 640952152, 675967154, 1025210682, -1590612499, 241213893, -1779016761, -772953297, 371142435, -725054360, -1504138522, 806480980, 966180365, -798098067, 1556657824, 547101515, 1446227486, 647201881, 2078758924, 1674319440, -676763924, 7601371, 308556602, -2121544895, 111915176,
## 754845568, 418118757, 1097402806, 1251110189, -1332834031, 894982345, 542760788, 310690177, 2011239982, -1556430381, 2022761016, -1006829461, -1346320224, -508244653, 843396633, -2107491768, 661235271, 1594638859, -847261, -1861551742, 1991168055, -578164503, -1391384292, -2112455287, 575323136, -506097748, 2027091026, 684144166, 906872019, -1791087308, -1991936115, 919794053, 1345727137, -1368100382, 1416044718, 1301064130, 1932237702, 792666683, -2028606752, -837382500, -1555846557, 844852165,
## -821236830, -1225523181, 564712667, -967978313, -391403513, 532628575, -654336907, 1319076328, -1284386717, -1450290552, 1976147344, -1875527371, -1903869155, 613479004, -1383717481, -802577061, 1592465726, 95855341, 1720498184, -68535017, 609372993, 440864016, -1882282981, 220356884, 323636372, -911290700, 1302715528, 885234303, -228890949, 1232542523, 1752449529, 48313893), minhashes = NULL, meta = list(file = "./data/pre_processed//1789-George Washington.txt", hash_func = "hash_string", id = "1789-George Washington",
## minhash_func = NULL, tokenizer = "tokenize_ngrams")))
## list(hash_func = "hash_string", tokenizer = "tokenize_ngrams")
#Choosing only the first 30 rows because otherwise the plot becomes unreadable since there are too many points
compare_df_viz <- compare_df[1:30, ]library(htmltools)
library(plotly)
fig <- plot_ly(compare_df_viz, x = ~a, y = ~b, z = ~score)
fig <- fig %>% add_markers()
fig <- fig %>% layout(scene = list(xaxis = list(title = 'a'),
yaxis = list(title = 'b'),
zaxis = list(title = 'score')))
figCosine Similarity
cosine_dist_mat <- 1 - crossprod_simple_triplet_matrix(dtms)/(sqrt(col_sums(dtms^2) %*% t(col_sums(dtms^2))))
cosine_dist_mat## Terms
## Terms can country every free future good
## can 0.0000000 0.3013170 0.3196871 0.3970037 0.3110346 0.2664495
## country 0.3013170 0.0000000 0.2134492 0.3652111 0.4106707 0.3386620
## every 0.3196871 0.2134492 0.0000000 0.4134426 0.2906490 0.3322699
## free 0.3970037 0.3652111 0.4134426 0.0000000 0.4161580 0.3816995
## future 0.3110346 0.4106707 0.2906490 0.4161580 0.0000000 0.2954737
## good 0.2664495 0.3386620 0.3322699 0.3816995 0.2954737 0.0000000
## government 0.2579230 0.2047350 0.2285687 0.4167750 0.4221402 0.3390539
## great 0.3042754 0.2390763 0.2096882 0.4302002 0.3422671 0.3008674
## just 0.3563657 0.3436575 0.2022730 0.4530560 0.2901312 0.4011886
## life 0.3891276 0.5365314 0.4278774 0.3598682 0.3989592 0.3532778
## may 0.2454736 0.2091903 0.2500473 0.3684674 0.4220194 0.2937487
## must 0.2257792 0.3539029 0.3447757 0.3947449 0.2996883 0.3999765
## nation 0.2810751 0.4035222 0.3253524 0.5014245 0.2368747 0.2633727
## nations 0.3268656 0.3906216 0.4437372 0.3606915 0.3747685 0.4074177
## new 0.3658506 0.6269561 0.4465935 0.5652935 0.3712807 0.4494017
## now 0.2702416 0.3961642 0.3232435 0.4636810 0.3613852 0.3781249
## one 0.1981736 0.2578042 0.2801983 0.3996113 0.3642686 0.3632012
## people 0.1876246 0.2312582 0.2164757 0.3129386 0.3493964 0.2671543
## power 0.3523297 0.3045336 0.3592473 0.4283691 0.4823880 0.3825696
## shall 0.3109780 0.3616201 0.3589731 0.3534992 0.4197767 0.3228838
## states 0.3775741 0.2668935 0.2919410 0.3686965 0.5228231 0.4072595
## time 0.2443284 0.3741071 0.2302306 0.3895374 0.1702277 0.3522756
## united 0.3648980 0.2623717 0.2403582 0.3262696 0.4483419 0.3755336
## will 0.1884575 0.2630200 0.2057079 0.3901750 0.2137291 0.2273583
## world 0.3037327 0.5509632 0.5317638 0.4174611 0.3732763 0.4578542
## Terms
## Terms government great just life may must
## can 0.2579230 0.3042754 0.3563657 0.3891276 0.2454736 0.2257792
## country 0.2047350 0.2390763 0.3436575 0.5365314 0.2091903 0.3539029
## every 0.2285687 0.2096882 0.2022730 0.4278774 0.2500473 0.3447757
## free 0.4167750 0.4302002 0.4530560 0.3598682 0.3684674 0.3947449
## future 0.4221402 0.3422671 0.2901312 0.3989592 0.4220194 0.2996883
## good 0.3390539 0.3008674 0.4011886 0.3532778 0.2937487 0.3999765
## government 0.0000000 0.2538097 0.3284080 0.5131929 0.1897588 0.3346813
## great 0.2538097 0.0000000 0.2831543 0.4549428 0.2135467 0.3918152
## just 0.3284080 0.2831543 0.0000000 0.4844806 0.3634034 0.4016780
## life 0.5131929 0.4549428 0.4844806 0.0000000 0.5740470 0.3630353
## may 0.1897588 0.2135467 0.3634034 0.5740470 0.0000000 0.3689675
## must 0.3346813 0.3918152 0.4016780 0.3630353 0.3689675 0.0000000
## nation 0.4031149 0.3087375 0.3363669 0.2829575 0.4428263 0.3346222
## nations 0.4886689 0.4644433 0.4336468 0.3544381 0.4583313 0.4073543
## new 0.5282904 0.4976656 0.4564718 0.3729945 0.6337560 0.3379381
## now 0.3539362 0.3478060 0.4525142 0.4819565 0.3803697 0.3446933
## one 0.2253451 0.3185289 0.4083071 0.5308138 0.2428221 0.3131816
## people 0.1561980 0.2122676 0.3205800 0.4053061 0.1893500 0.2844634
## power 0.3144943 0.3182054 0.5045919 0.6500729 0.1761881 0.5603424
## shall 0.2626669 0.3262349 0.3740483 0.4798238 0.2220753 0.4286209
## states 0.1503715 0.2484336 0.3144041 0.6534657 0.1793855 0.5053126
## time 0.3544366 0.2798467 0.2648991 0.3886612 0.3363113 0.2596588
## united 0.2204035 0.1972449 0.3001157 0.5073253 0.2731160 0.4412357
## will 0.2554974 0.2392831 0.2468342 0.3862783 0.2956313 0.2099962
## world 0.5681668 0.5193071 0.5327552 0.3016199 0.5944010 0.2184216
## Terms
## Terms nation nations new now one people
## can 0.2810751 0.3268656 0.3658506 0.2702416 0.1981736 0.1876246
## country 0.4035222 0.3906216 0.6269561 0.3961642 0.2578042 0.2312582
## every 0.3253524 0.4437372 0.4465935 0.3232435 0.2801983 0.2164757
## free 0.5014245 0.3606915 0.5652935 0.4636810 0.3996113 0.3129386
## future 0.2368747 0.3747685 0.3712807 0.3613852 0.3642686 0.3493964
## good 0.2633727 0.4074177 0.4494017 0.3781249 0.3632012 0.2671543
## government 0.4031149 0.4886689 0.5282904 0.3539362 0.2253451 0.1561980
## great 0.3087375 0.4644433 0.4976656 0.3478060 0.3185289 0.2122676
## just 0.3363669 0.4336468 0.4564718 0.4525142 0.4083071 0.3205800
## life 0.2829575 0.3544381 0.3729945 0.4819565 0.5308138 0.4053061
## may 0.4428263 0.4583313 0.6337560 0.3803697 0.2428221 0.1893500
## must 0.3346222 0.4073543 0.3379381 0.3446933 0.3131816 0.2844634
## nation 0.0000000 0.3911856 0.2389233 0.3628019 0.3765086 0.2904789
## nations 0.3911856 0.0000000 0.4748058 0.4749950 0.5669782 0.3968177
## new 0.2389233 0.4748058 0.0000000 0.4120310 0.4726255 0.4367703
## now 0.3628019 0.4749950 0.4120310 0.0000000 0.3042476 0.2278992
## one 0.3765086 0.5669782 0.4726255 0.3042476 0.0000000 0.2424710
## people 0.2904789 0.3968177 0.4367703 0.2278992 0.2424710 0.0000000
## power 0.4872611 0.6527536 0.6956918 0.5153625 0.2507013 0.2587369
## shall 0.4901205 0.4562546 0.5808853 0.3135053 0.4146651 0.2428110
## states 0.5151173 0.5438493 0.6967915 0.3601278 0.3113200 0.2187172
## time 0.2190216 0.4210667 0.2539761 0.2155948 0.2571156 0.2440165
## united 0.4477340 0.4010402 0.6261884 0.3173412 0.3682137 0.2665430
## will 0.2334926 0.3713594 0.3063816 0.1803612 0.2196700 0.1838601
## world 0.3381502 0.2622823 0.2473251 0.4499794 0.4556484 0.4291283
## Terms
## Terms power shall states time united will
## can 0.3523297 0.3109780 0.3775741 0.2443284 0.3648980 0.1884575
## country 0.3045336 0.3616201 0.2668935 0.3741071 0.2623717 0.2630200
## every 0.3592473 0.3589731 0.2919410 0.2302306 0.2403582 0.2057079
## free 0.4283691 0.3534992 0.3686965 0.3895374 0.3262696 0.3901750
## future 0.4823880 0.4197767 0.5228231 0.1702277 0.4483419 0.2137291
## good 0.3825696 0.3228838 0.4072595 0.3522756 0.3755336 0.2273583
## government 0.3144943 0.2626669 0.1503715 0.3544366 0.2204035 0.2554974
## great 0.3182054 0.3262349 0.2484336 0.2798467 0.1972449 0.2392831
## just 0.5045919 0.3740483 0.3144041 0.2648991 0.3001157 0.2468342
## life 0.6500729 0.4798238 0.6534657 0.3886612 0.5073253 0.3862783
## may 0.1761881 0.2220753 0.1793855 0.3363113 0.2731160 0.2956313
## must 0.5603424 0.4286209 0.5053126 0.2596588 0.4412357 0.2099962
## nation 0.4872611 0.4901205 0.5151173 0.2190216 0.4477340 0.2334926
## nations 0.6527536 0.4562546 0.5438493 0.4210667 0.4010402 0.3713594
## new 0.6956918 0.5808853 0.6967915 0.2539761 0.6261884 0.3063816
## now 0.5153625 0.3135053 0.3601278 0.2155948 0.3173412 0.1803612
## one 0.2507013 0.4146651 0.3113200 0.2571156 0.3682137 0.2196700
## people 0.2587369 0.2428110 0.2187172 0.2440165 0.2665430 0.1838601
## power 0.0000000 0.4360201 0.3234218 0.4166865 0.3998290 0.4231578
## shall 0.4360201 0.0000000 0.2449347 0.3897523 0.3418936 0.2887096
## states 0.3234218 0.2449347 0.0000000 0.4248667 0.1442522 0.3265694
## time 0.4166865 0.3897523 0.4248667 0.0000000 0.3697408 0.1750546
## united 0.3998290 0.3418936 0.1442522 0.3697408 0.0000000 0.3062891
## will 0.4231578 0.2887096 0.3265694 0.1750546 0.3062891 0.0000000
## world 0.7102814 0.5616665 0.7295113 0.3166116 0.5751415 0.3533194
## Terms
## Terms world
## can 0.3037327
## country 0.5509632
## every 0.5317638
## free 0.4174611
## future 0.3732763
## good 0.4578542
## government 0.5681668
## great 0.5193071
## just 0.5327552
## life 0.3016199
## may 0.5944010
## must 0.2184216
## nation 0.3381502
## nations 0.2622823
## new 0.2473251
## now 0.4499794
## one 0.4556484
## people 0.4291283
## power 0.7102814
## shall 0.5616665
## states 0.7295113
## time 0.3166116
## united 0.5751415
## will 0.3533194
## world 0.0000000
#Najada-- clusters for the results of cosine similarity
library(cluster)
d <- dist(t(cosine_dist_mat), method="euclidian")
kfit <- kmeans(d, 4)
clusplot(as.matrix(d), kfit$cluster, color=T, shade=T, labels=2, lines=0)#What are we trying to do here?
dtms[,1]## <<DocumentTermMatrix (documents: 59, terms: 1)>>
## Non-/sparse entries: 56/3
## Sparsity : 5%
## Maximal term length: 3
## Weighting : term frequency (tf)
cosine_sim <- tcrossprod_simple_triplet_matrix(dtms[,1], dtms[,2])/sqrt(row_sums(dtms[,2]^2) %*% t(row_sums(dtms[,1]^2)))
#cosine_sim//TODO: writing